library(readODS) #read ods files
library(readxl) #reads excel files
library(tidyverse) #Tidy packages
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr) #lots of functions - data manipulation
library(usethis) #look at and apply settings to Github actions
library(ggplot2) #pretty plots
library(ggthemes) #themes for ggplot
library(RColorBrewer) #color palettes
library(wesanderson) #color palettes from Wes Anderson films
library(scales) #axis formatting options
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(plotly) #interactive web graphs
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(networkD3) #interactive web network graphs
library(flipPlots) #plots (install with Github)
usethis::edit_r_environ() #set environment with Github token for API
## • Edit '/Users/kaydeebarker/.Renviron'
## • Restart R for changes to take effect
Introduction to the inventory, with links kilotonnes of CO2 equivalents
# Map loop to download UK 2022 data from the UK Department for Energy Security and Net Zero
downloaded <- file.exists("UKGHG_2022.ods") #checks if file is downloaded in working directory
if(downloaded != T){ #if downloaded is not true
map2("https://assets.publishing.service.gov.uk/media/65c0d54663a23d000dc821ca/final-greenhouse-gas-emissions-2022-by-source-dataset.ods", #update this link when new data available
"UKGHG_2022.ods", download.file)} else{print('data downloaded')} #name and download or print
## [1] "data downloaded"
#Read in UK 2022 data from the UK Department for Energy Security and Net Zero
GHG_UK22 <- read_ods(
path = "UKGHG_2022.ods",
sheet = 1, #define tab/sheet to read
col_names = TRUE, #use header row for column names
col_types = NULL, #guess data types
na = "", #treat blank cells as NA
skip = 0, #don't skip rows
formula_as_formula = FALSE, #values only
range = NULL,
row_names = FALSE, #no row names
strings_as_factors = TRUE) %>% #use factors
rename("Subsector" = "TES Subsector") #rename variable
#Create dataframe with only categorization
GHG_categories <- GHG_UK22[, c(3,6:12)] %>%
distinct()
#IPCC code and TES subsectors - for categorization
GHG_IPCC_TES <- GHG_UK22[, c(3,6,7)] %>%
distinct() %>%
rename("IPCC_code" = "IPCC Code") #rename variable to match other df
# Map loop to download UK Regional data 1990-2021 from the UK National Atmospheric Emissions Inventory
downloaded2 <- file.exists("GHG_regions.xlsx") #checks if file is downloaded in working directory
if(downloaded2 != T){ #if downloaded is not true
map2("https://uk-air.defra.gov.uk/reports/cat09/2306200930_DA_GHGI_1990-2021_Final_v3.1.xlsx", #update this link when new data available
"GHG_regions.xlsx", download.file)} else{print('data downloaded')} #name and download or print
## [1] "data downloaded"
#Read in excel file, by source, with region
GHG_regions <- read_xlsx(
"GHG_regions.xlsx", #excel file from current working directory
sheet = "BySource_data", #define tab/sheet to read
col_names = TRUE, #use header row for column names
col_types = NULL, #guess data types
na = "", #treat blank cells as NA
trim_ws = TRUE, #trim any whitespace/unused columns and rows
skip = 6, #skip 6 rows to get to pivot table data
n_max = Inf, #set maximum number of rows to include
guess_max = min(1000, Inf), #how many rows to use to guess data types
progress = readxl_progress(), #display progress in reading in data
.name_repair = "unique" #makes sure all column names not empty or duplicated
)
#Add subsector into GHG_regions, match format
GHG_regions2 <- inner_join(GHG_IPCC_TES, GHG_regions,
by='IPCC_code') %>% #use inner join to combine dataframes by a shared variable
rename("GHG" = "Pollutant") %>% #rename variable to match UK only csv
rename("NC_Sector" = "NCFormat") %>% #rename variable
rename("TES_Sector" = "TES Sector") #rename variable
#Simplify regional dataset
GHG_simp <- GHG_regions2 %>%
group_by(EmissionYear, TES_Sector, Subsector, RegionName, GHG) %>%
summarize(Temissions = sum(Emission, na.rm = TRUE)) %>%
rename("Sector" = "TES_Sector")
## `summarise()` has grouped output by 'EmissionYear', 'TES_Sector', 'Subsector',
## 'RegionName'. You can override using the `.groups` argument.
#2021 only
GHG_2021 <- GHG_simp %>%
filter(EmissionYear == "2021") #filter to only 2021
#Grouped into all of UK
GHG_2021_joined <- GHG_2021 %>%
group_by(Sector, Subsector, GHG) %>%
summarize(Temissions = sum(Temissions, na.rm = TRUE))
## `summarise()` has grouped output by 'Sector', 'Subsector'. You can override
## using the `.groups` argument.
#Define custom color palette
colors <- paste(c('#543005','#8c510a','#bf812d','#dfc27d','#f6e8c3','#f5f5f5','#c7eae5','#80cdc1','#35978f','#01665e',
'#003c30','#40004b','#762a83','#9970ab','#c2a5cf','#e7d4e8','#f7f7f7','#d9f0d3','#a6dba0','#5aae61',
'#1b7837','#00441b','#67001f','#b2182b','#d6604d','#f4a582','#fddbc7','#f7f7f7','#d1e5f0','#92c5de',
'#4393c3','#2166ac','#053061','#c51b7d','#de77ae','#f1b6da','#fde0ef','#e6f5d0','#b8e186','#7fbc41',
'#4d9221'), collapse = '", "')
colorJS <- paste('d3.scaleOrdinal(["', colors, '"])') #format to call Javascript D3
#Set up Sankey links dataframe - from sector to subsector to GHGs
links <- data.frame(source=c(paste0(GHG_2021_joined$Sector), paste0(GHG_2021_joined$Subsector)),
target=c(paste0(GHG_2021_joined$Subsector), paste0(GHG_2021_joined$GHG)),
value=as.numeric(paste0(GHG_2021_joined$Temissions)))
links <- links[-c(83:85),] #remove instances with repeat name
#Create nodes df from names in links df
nodes <- data.frame(
name=unique(c(as.character(links$source),
as.character(links$target))))
#Add ID numbers
links$IDsource <- as.numeric(match(links$source, nodes$name)-1)
links$IDtarget <- as.numeric(match(links$target, nodes$name)-1)
#Set up Sankey links dataframe - breakdown from GHGs to sectors to subsectors
links2 <- data.frame(source=c(paste0(GHG_2021_joined$GHG), paste0(GHG_2021_joined$Sector)),
target=c(paste0(GHG_2021_joined$Sector), paste0(GHG_2021_joined$Subsector)),
value=as.numeric(paste0(GHG_2021_joined$Temissions)))
links2 <- links2[-c(168:170),] #remove instances with repeat name
#Create nodes df from names in links df
nodes2 <- data.frame(
name=unique(c(as.character(links2$source),
as.character(links2$target))))
#Add ID numbers
links2$IDsource <- as.numeric(match(links2$source, nodes2$name)-1)
links2$IDtarget <- as.numeric(match(links2$target, nodes2$name)-1)
#Plot with networkD3
sankey <- sankeyNetwork(Links = links2, Nodes = nodes2, #dataframes to use
Source = "IDsource", Target = "IDtarget", #numeric identifiers of variables
Value = "value", #numeric weights
NodeID = "name", #node labels
units = "kt CO2 eq.", #label units
colourScale = colorJS,
fontSize = 7,
sinksRight=FALSE, #justify last variables right if true
nodePadding = 8, #vertical space between nodes
iterations = 10) #number of times for it to try to find the best order and format
sankey #view plot
Sankey diagram demonstrating proportional breakdown of greenhouse gas emissions in the UK in 2021. From left to right, greenhouse gas by sector by subsector. Proportions based on kilotonnes of CO2 equivalents.
#Plot with Plotly
sankey3 <- plot_ly(type = "sankey",
domain = list(x = c(0,1),y = c(0,1)),
orientation = "h",
valueformat = ".0f",
valuesuffix = "kt CO2 eq.",
node = list(
label = nodes$name,
pad = 15,
thickness = 20,
line = list(color = "black", width = 0.5),
link = list(
source = links$IDsource,
target = links$IDtarget,
value = links$value
#label = linklist$label
))) %>%
layout(
title = "Greenhouse Gas Emissions Per Sector in the UK",
font = list(size = 10),
xaxis = list(showgrid = F, zeroline = F),
yaxis = list(showgrid = F, zeroline = F))
sankey3
#Plot time series
Sect_time <- GHG_regions2 %>%
group_by(EmissionYear, TES_Sector) %>%
summarize(Temissions = sum(Emission, na.rm = TRUE)) %>%
rename("Sector" = "TES_Sector") %>%
ggplot(aes(x=as.numeric(EmissionYear), y=Temissions, color=Sector)) +
geom_line() +
xlab("") + ylab("Greenhouse Gas Emissions (kilotonnes CO2 eq.)") +
ggtitle("Greenhouse Gas Emissions of the UK per Sector Over Time") +
theme_few() +
scale_y_continuous(labels = comma) +
scale_x_continuous(breaks = pretty(c(1990:2020), n=6)) +
scale_color_brewer(palette = "Set2") +
theme(legend.position="bottom")
## `summarise()` has grouped output by 'EmissionYear'. You can override using the
## `.groups` argument.
#Sect_time #view ggplot
#Make interactive with Plotly
t_int <- ggplotly(Sect_time) %>% #convert to interactive graph
layout(legend = list(
orientation = "h"))
#Plotly customization
style(t_int, hovertemplate="Year: %{x:,.r}
CO2eq: %{y:,.4r} kt") #define what hover label says, round x values, round y values to 4 sig. figs
Greenhouse gas emissions in kilotonnes of CO2 equivalents in the UK over time, by sector.
ag_df <- GHG_regions2 %>%
filter(TES_Sector == "Agriculture") %>% #filter sector
filter(EmissionYear == "2021") %>% #filter to only 2021
group_by(Subsector, IPCC_name, GHG, IPCC_code) %>%
summarize(Temissions = sum(Emission, na.rm = TRUE))
## `summarise()` has grouped output by 'Subsector', 'IPCC_name', 'GHG'. You can
## override using the `.groups` argument.
ag_df$IPCC_name <- gsub('_', ' ', ag_df$IPCC_name) #replace underscores with spaces to make nicer to read
#Set up Sankey links dataframe
aglinks <- data.frame(source=c(paste0(ag_df$GHG), paste0(ag_df$Subsector)),
target=c(paste0(ag_df$Subsector), paste0(ag_df$IPCC_name)),
value=as.numeric(paste0(ag_df$Temissions)))
#Create nodes df from names in links df
agnodes <- data.frame(
name=unique(c(as.character(aglinks$source),
as.character(aglinks$target))))
#Add ID numbers
aglinks$IDsource <- as.numeric(match(aglinks$source, agnodes$name)-1)
aglinks$IDtarget <- as.numeric(match(aglinks$target, agnodes$name)-1)
#Plot with networkD3
agsankey <- sankeyNetwork(Links = aglinks, Nodes = agnodes,
Source = "IDsource", Target = "IDtarget",
Value = "value", NodeID = "name",
units = "kilotonnes CO2 eq.",
sinksRight=FALSE, nodePadding = 8,
iterations = 5)
agsankey
Sankey diagram demonstrating breakdown of greenhouse gas emissions in the UK’s agricultural sector in 2021. From left to right, greenhouse gas by subsector by individual source, following IPCC source names.
#Bar plot by subsector, stacked by GHG
ggplot(ag_df, aes(x = Subsector, y = Temissions, fill = GHG)) +
geom_col() +
theme_few() + #remove grid, white background
scale_fill_manual(values = wes_palette("GrandBudapest1")) +
xlab("Subsector") + ylab("Emissions (kt CO2 eq.)") #define/change X and Y labels
Bar graph showing greenhouse gas emissions in kilotonnes of CO2 equivalents in the UK’s agricultural sector in 2021 by subsector, colored by greenhouse gas identity.
#Will make donut or pie chart here
#Bar plot by source
ggplot(ag_df, aes(x = IPCC_name, y = Temissions, fill = Subsector)) +
geom_bar(stat = "identity", width = 0.7, position = "dodge") +
theme_few() + #remove grid, white background
scale_fill_manual(values = wes_palette("GrandBudapest1")) +
scale_x_discrete(guide = guide_axis(angle = 45)) +
ggtitle("Agriculture emissions in 2021") + #add a graph title
xlab("") + ylab("Emissions (kt CO2 eq.)") + #define/change X and Y labels
theme(legend.position="top", legend.box = "horizontal",
text=element_text(size=10, colour = "black"),
axis.text.x = element_text(size=8, colour = "black"))
Bar graph showing greenhouse gas emissions in kilotonnes of CO2 equivalents in the UK’s agricultural sector in 2021 by source, colored by subsector.
#Bar plot by source, facet by GHG
ggplot(ag_df, aes(x = IPCC_code, y = Temissions, fill = Subsector)) +
geom_bar(stat = "identity", width = 0.7, position = "dodge") + facet_grid(~ GHG) +
theme_few() + #remove grid, white background
scale_fill_manual(values = wes_palette("GrandBudapest1")) +
scale_x_discrete(guide = guide_axis(angle = 90)) +
ggtitle("Agriculture emissions in 2021") + #add a graph title
xlab("Source IPCC Code") + ylab("Emissions (kt CO2 eq.)") + #define/change X and Y labels
theme(text=element_text(size=8, colour = "black"))
Bar graph showing greenhouse gas emissions in kilotonnes of CO2 equivalents in the UK’s agricultural sector in 2021 by IPCC code, colored by subsector and grouped by greenhouse gas identity.
#Bar plot combustion, stacked by GHG
ag_df %>%
filter(Subsector == "Agricultural combustion") %>%
ggplot(aes(x = IPCC_name, y = Temissions, fill = GHG)) +
geom_col() +
theme_few() + #remove grid, white background
scale_fill_manual(values = wes_palette("GrandBudapest1")) +
scale_x_discrete(guide = guide_axis(angle = 45)) +
ggtitle("Subsector emissions in 2021") + #add a graph title
xlab("") + ylab("Emissions (kt CO2 eq.)") + #define/change X and Y labels
theme(legend.position="left", legend.box = "vertical",
axis.text.x = element_text(size=8, colour = "black"))
Bar graph showing greenhouse gas emissions in kilotonnes of CO2 equivalents in the agricultural combustion subsector in 2021 by source, colored by greenhouse gas identity.
#Bar plot soils, stacked by GHG
ag_df %>%
filter(Subsector == "Agricultural soils") %>%
ggplot(aes(x = IPCC_name, y = Temissions, fill = GHG)) +
geom_col() +
theme_few() + #remove grid, white background
scale_fill_manual(values = wes_palette("GrandBudapest1")) +
scale_x_discrete(guide = guide_axis(angle = 45)) +
ggtitle("Subsector emissions in 2021") + #add a graph title
xlab("") + ylab("Emissions (kt CO2 eq.)") + #define/change X and Y labels
theme(legend.position="left", legend.box = "vertical",
axis.text.x = element_text(size=8, colour = "black"))
Bar graph showing greenhouse gas emissions in kilotonnes of CO2 equivalents in the agricultural soils subsector in 2021 by source, colored by greenhouse gas identity.
#Bar plot livestock, stacked by GHG
ag_df %>%
filter(Subsector == "Livestock") %>%
ggplot(aes(x = IPCC_name, y = Temissions, fill = GHG)) +
geom_col() +
theme_few() + #remove grid, white background
scale_fill_manual(values = wes_palette("GrandBudapest1")) +
scale_x_discrete(guide = guide_axis(angle = 45)) +
ggtitle("Subsector emissions in 2021") + #add a graph title
xlab("") + ylab("Emissions (kt CO2 eq.)") + #define/change X and Y labels
theme(legend.position="left", legend.box = "vertical",
axis.text.x = element_text(size=8, colour = "black"))
Bar graph showing greenhouse gas emissions in kilotonnes of CO2 equivalents in the livestock subsector in 2021 by source, colored by greenhouse gas identity.
#Bar plot other ag, stacked by GHG
ag_df %>%
filter(Subsector == "Other agriculture") %>%
ggplot(aes(x = IPCC_name, y = Temissions, fill = GHG)) +
geom_col() +
theme_few() + #remove grid, white background
scale_fill_manual(values = wes_palette("GrandBudapest1")) +
scale_x_discrete(guide = guide_axis(angle = 45)) +
ggtitle("Subsector emissions in 2021") + #add a graph title
xlab("") + ylab("Emissions (kt CO2 eq.)") + #define/change X and Y labels
theme(legend.position="left", legend.box = "vertical",
axis.text.x = element_text(size=8, colour = "black"))
Bar graph showing greenhouse gas emissions in kilotonnes of CO2 equivalents in the agricultural ‘other’ subsector in 2021 by source, colored by greenhouse gas identity.